{ lib, pkgs, ... }:
let
  slurmconfig = {
    services.slurm = {
      controlMachine = "control";
      nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ];
      partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ];
      extraConfig = ''
        AccountingStorageHost=dbd
        AccountingStorageType=accounting_storage/slurmdbd
      '';
    };
    environment.systemPackages = [ mpitest ];
    networking.firewall.enable = false;
    systemd.tmpfiles.rules = [
      "f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest"
    ];
  };

  mpitest =
    let
      mpitestC = pkgs.writeText "mpitest.c" ''
        #include <stdio.h>
        #include <stdlib.h>
        #include <mpi.h>

        int
        main (int argc, char *argv[])
        {
          int rank, size, length;
          char name[512];

          MPI_Init (&argc, &argv);
          MPI_Comm_rank (MPI_COMM_WORLD, &rank);
          MPI_Comm_size (MPI_COMM_WORLD, &size);
          MPI_Get_processor_name (name, &length);

          if ( rank == 0 ) printf("size=%d\n", size);

          printf ("%s: hello world from process %d of %d\n", name, rank, size);

          MPI_Finalize ();

          return EXIT_SUCCESS;
        }
      '';
    in
    pkgs.runCommand "mpitest" { } ''
      mkdir -p $out/bin
      ${lib.getDev pkgs.mpi}/bin/mpicc ${mpitestC} -o $out/bin/mpitest
    '';

  sbatchOutput = "/tmp/shared/sbatch.log";
  sbatchScript = pkgs.writeText "sbatchScript" ''
    #!${pkgs.runtimeShell}
    #SBATCH --nodes 1
    #SBATCH --ntasks 1
    #SBATCH --output ${sbatchOutput}

    echo "sbatch success"
  '';
in
{
  name = "slurm";

  meta.maintainers = [ lib.maintainers.markuskowa ];

  nodes =
    let
      computeNode =
        { ... }:
        {
          imports = [ slurmconfig ];
          # TODO slurmd port and slurmctld port should be configurations and
          # automatically allowed by the  firewall.
          services.slurm = {
            client.enable = true;
          };
        };
    in
    {

      control =
        { ... }:
        {
          imports = [ slurmconfig ];
          services.slurm = {
            server.enable = true;
          };
        };

      submit =
        { ... }:
        {
          imports = [ slurmconfig ];
          services.slurm = {
            enableStools = true;
          };
        };

      dbd =
        { pkgs, ... }:
        let
          passFile = pkgs.writeText "dbdpassword" "password123";
        in
        {
          networking.firewall.enable = false;
          systemd.tmpfiles.rules = [
            "f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest"
          ];
          services.slurm.dbdserver = {
            enable = true;
            storagePassFile = "${passFile}";
          };
          services.mysql = {
            enable = true;
            package = pkgs.mariadb;
            initialScript = pkgs.writeText "mysql-init.sql" ''
              CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123';
              GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost';
            '';
            ensureDatabases = [ "slurm_acct_db" ];
            ensureUsers = [
              {
                ensurePermissions = {
                  "slurm_acct_db.*" = "ALL PRIVILEGES";
                };
                name = "slurm";
              }
            ];
          };
        };

      node1 = computeNode;
      node2 = computeNode;
      node3 = computeNode;
    };

  testScript = ''
    with subtest("can_start_slurmdbd"):
        dbd.wait_for_unit("slurmdbd.service")
        dbd.wait_for_open_port(6819)

    with subtest("cluster_is_initialized"):
        control.wait_for_unit("multi-user.target")
        control.wait_for_unit("slurmctld.service")
        control.wait_until_succeeds("sacctmgr list cluster | awk '{ print $1 }' | grep default")

    start_all()

    with subtest("can_start_slurmd"):
        for node in [node1, node2, node3]:
            node.wait_for_unit("slurmd")

    # Test that the cluster works and can distribute jobs;
    submit.wait_for_unit("multi-user.target")

    with subtest("run_distributed_command"):
        # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
        # The output must contain the 3 different names
        submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")

        with subtest("check_slurm_dbd_job"):
            # find the srun job from above in the database
            control.wait_until_succeeds("sacct | grep hostname")

    with subtest("run_PMIx_mpitest"):
        submit.succeed("srun -N 3 --mpi=pmix mpitest | grep size=3")

    with subtest("run_sbatch"):
        submit.succeed("sbatch --wait ${sbatchScript}")
        submit.succeed("grep 'sbatch success' ${sbatchOutput}")
  '';
}
